#!/bin/bash
set -eo pipefail

# if [ $# != 1 ]
# then
# echo "
# Useage:
# vcf_merge.sh vcf_list
# "
# fi

pwd=$PWD

# if [ ! -e tmp ]
# then
mkdir -p tmp/split_vcf/gvcf
mkdir -p tmp/scripts/split_vcf
mkdir -p tmp/scripts/annovar
mkdir -p tmp/scripts/concat
mkdir -p tmp/log/

# else
# echo "tmp file exists!"
# exit 1
# fi

#split vcf by chromesomes
for chrom in {1..22} X Y
do
echo "
bcftools-1.6 merge \\
    -g /PUBLIC/database/HUMAN/genome/Human/human_g1k_v37_decoy.fasta \\
    -l ${pwd}/vcf.list \\
    -r ${chrom} \\
    -Oz -o ${pwd}/tmp/split_vcf/gvcf/chr${chrom}.merged.vcf.gz

mkdir -p ${pwd}/tmp/split_vcf/snp_${chrom}

bcftools-1.6 view -i '%TYPE==\"snp\"' -Oz -o ${pwd}/tmp/split_vcf/snp_${chrom}/chr${chrom}.snp.merged.vcf.gz ${pwd}/tmp/split_vcf/gvcf/chr${chrom}.merged.vcf.gz
bcftools-1.6 index -tf ${pwd}/tmp/split_vcf/snp_${chrom}/chr${chrom}.snp.merged.vcf.gz
mkdir -p ${pwd}/tmp/split_vcf/indel_${chrom}
bcftools-1.6 view -i '%TYPE==\"indel\"' -Oz -o ${pwd}/tmp/split_vcf/indel_${chrom}/chr${chrom}.indel.merged.vcf.gz ${pwd}/tmp/split_vcf/gvcf/chr${chrom}.merged.vcf.gz
bcftools-1.6 index -tf ${pwd}/tmp/split_vcf/indel_${chrom}/chr${chrom}.indel.merged.vcf.gz

echo \"vcf merge splited by chromesomes completion!\"
" > ${pwd}/tmp/scripts/split_vcf/work.${chrom}.vcf_merge.sh

for var in snp indel
do
var_type=`echo ${var}|tr "a-z" "A-Z"`
echo "
set -eo pipefail
cd ${pwd}/tmp/split_vcf/${var}_${chrom}
samples=\`bcftools view -h chr${chrom}.${var}.merged.vcf.gz|tail -n 1|tr \"\t\" \"\n\" |tail -n +10\`
# ===== 1 databases annotation with annovar =====
sh /PUBLIC/software/HUMAN/ANNOVAR_2017Jul16/Var_annotation_disease_ANNOVAR2017Jul16_v4.7.sh \\
    -b ${var_type} \\
    -p 4 \\
    -r b37 \\
    ${pwd}/tmp/split_vcf/${var}_${chrom}/chr${chrom}.${var}.merged.vcf.gz \\
    \${samples}

#pyannovar \\
#-t ${var} \\
#-f vcf \\
#-th 1 \\
#-explain \\
#-o chr${chrom}.${var}.merged.annovar \\
#${pwd}/tmp/split_vcf/${var}_${chrom}/chr${chrom}.${var}.merged.vcf.gz

# ===== 2 add PubmedID and reformat HGMD =====
python /ifs/TJPROJ3/DISEASE/share/Disease/AddOMIM_HGMD/AddHGMD_OMIM_Priority_pipe4.7.py \\
   ${pwd}/tmp/split_vcf/${var}_${chrom}/chr${chrom}.${var}.merged_sn.annovar.hg19_multianno.xls \\
   ${pwd}/tmp/split_vcf/${var}_${chrom}/chr${chrom}.${var}.merged.annovar.hg19_multianno_mid.xls \\
   hg19

# ===== 3 add HPA annotation =====
python /ifs/TJPROJ3/DISEASE/share/Disease/HPA.v15/annotatExpression_for_multiannofile.py \\
    -i ${pwd}/tmp/split_vcf/${var}_${chrom}/chr${chrom}.${var}.merged.annovar.hg19_multianno_mid.xls \\
    -o ${pwd}/tmp/split_vcf/${var}_${chrom}/chr${chrom}.${var}.merged.annovar.hg19_multianno.xls

gzip -f ${pwd}/tmp/split_vcf/${var}_${chrom}/chr${chrom}.${var}.merged.annovar.hg19_multianno.xls

rm -f *${var}*_sn*gz *${var}*multianno.xls.bak *${var}*multianno_mid.xls *${var}*_sn*

echo annotate merged snp vcf done: \`date \"+%F %T\"\`

" > ${pwd}/tmp/scripts/annovar/work.${var}.chr${chrom}.sh
done
done

#concat
echo "
ls ${pwd}/tmp/split_vcf/snp_{{1..22},X,Y}/chr*.snp.merged.vcf.gz > ${pwd}/tmp/scripts/concat/snp_vcfs_list
ls ${pwd}/tmp/split_vcf/indel_{{1..22},X,Y}/chr*.indel.merged.vcf.gz > ${pwd}/tmp/scripts/concat/indel_vcfs_list

bcftools concat \\
-f ${pwd}/tmp/scripts/concat/snp_vcfs_list \\
-Oz -o ${pwd}/VCF/snp.merged.vcf.gz

bcftools concat \\
-f ${pwd}/tmp/scripts/concat/indel_vcfs_list \\
-Oz -o ${pwd}/VCF/indel.merged.vcf.gz

snp_annovars=\`ls ${pwd}/tmp/split_vcf/*/chr*.snp.merged.annovar.hg19_multianno.xls.gz\`
indel_annovars=\`ls ${pwd}/tmp/split_vcf/*/chr*.indel.merged.annovar.hg19_multianno.xls.gz\`
zcat ${pwd}/tmp/split_vcf/chrY_indel/chrY.indel.merged.annovar.hg19_multianno.xls.gz|awk 'NR==1{print}' > ${pwd}/VCF/snp.merged.annovar.hg19_multianno.xls
zcat ${pwd}/tmp/split_vcf/chrY_indel/chrY.indel.merged.annovar.hg19_multianno.xls.gz|awk 'NR==1{print}' > ${pwd}/VCF/indel.merged.annovar.hg19_multianno.xls

for chrom in {1..22} X Y
do
zcat ${pwd}/tmp/split_vcf/chr${chrom}_annovar/chr${chrom}.snp.merged.annovar.hg19_multianno.xls.gz |tail -n +2 >> snp.merged.annovar.hg19_multianno.xls
zcat ${pwd}/tmp/split_vcf/chr${chrom}_annovar/chr${chrom}.indel.merged.annovar.hg19_multianno.xls.gz |tail -n +2 >> indel.merged.annovar.hg19_multianno.xls
done

gzip -f ${pwd}/VCF/snp.merged.annovar.hg19_multianno.xls
gzip -f ${pwd}/VCF/indel.merged.annovar.hg19_multianno.xls

#rm -rf tmp
" > ${pwd}/tmp/scripts/concat/work.concat.sh